load(file='./source/in_vitro/tier1_NAMs_apcra_pro_02262024.RData')
load(file='./source/chem/apcra_chem_ad.RData')
asnm.tier1.all.invivo <- read.xlsx('./output/asnm_tier1_all_invivo.xlsx', colNames = TRUE, sheet=1) %>% as.data.table()
seem3 <- fread('./source/exposure/SupTable-all.chem.preds-2018-11-28.txt')
# I tried using this RData from Git but it did not contain any of our DTXSIDs:
#load('./source/exposure/new.chem.preds-2019-12-13.RData')
#seem3 <- chem.preds
colnames(mega.mc5) # this is going to have the hazard information we need
## [1] "spid" "chid" "casn"
## [4] "chnm" "dsstox_substance_id" "code"
## [7] "aeid" "aenm" "m5id"
## [10] "m4id" "bmad" "resp_max"
## [13] "resp_min" "max_mean" "max_mean_conc"
## [16] "max_med" "max_med_conc" "logc_max"
## [19] "logc_min" "cnst" "hill"
## [22] "hcov" "gnls" "gcov"
## [25] "cnst_er" "cnst_aic" "cnst_rmse"
## [28] "cnst_prob" "hill_tp" "hill_tp_sd"
## [31] "hill_ga" "hill_ga_sd" "hill_gw"
## [34] "hill_gw_sd" "hill_er" "hill_er_sd"
## [37] "hill_aic" "hill_rmse" "hill_prob"
## [40] "gnls_tp" "gnls_tp_sd" "gnls_ga"
## [43] "gnls_ga_sd" "gnls_gw" "gnls_gw_sd"
## [46] "gnls_la" "gnls_la_sd" "gnls_lw"
## [49] "gnls_lw_sd" "gnls_er" "gnls_er_sd"
## [52] "gnls_aic" "gnls_rmse" "gnls_prob"
## [55] "nconc" "npts" "nrep"
## [58] "nmed_gtbl" "hitc" "modl"
## [61] "fitc" "coff" "actp"
## [64] "modl_er" "modl_tp" "modl_ga"
## [67] "modl_gw" "modl_la" "modl_lw"
## [70] "modl_rmse" "modl_prob" "modl_acc"
## [73] "modl_acb" "modl_ac10" "resp_unit"
## [76] "conc_unit" "mc6_flags" "flag.length"
## [79] "use.me" "ac50_uM" "acc_uM"
## [82] "asnm"
seem.apcra <- seem3[dsstox_substance_id %in% apcra.list[,DTXSID]] # only 193...
apcra.list[!DTXSID %in% seem.apcra$dsstox_substance_id]
seem.apcra[is.na(seem3.u95) & !is.na(seem3)]
seem.apcra[is.na(seem3.u95)& !is.na(seem3), seem3.u95 := seem3]
seem.apcra[,seem3.log10 := log10(seem3)]
seem.apcra[,seem3.u95.log10 := log10(seem3.u95)]
asnm.tier1.all.invivo$seem3.u95.log10 <- seem.apcra$seem3.u95.log10[match(asnm.tier1.all.invivo$dsstox_substance_id,
seem.apcra$dsstox_substance_id)]
asnm.tier1.all.invivo$seem3.log10 <- seem.apcra$seem3.log10[match(asnm.tier1.all.invivo$dsstox_substance_id,
seem.apcra$dsstox_substance_id)]
asnm.tier1.all.invivo[,ber.targeted := median(aed50.atg,
aed50.bsk,
aed50.ccte,
aed50.nvs, na.rm=TRUE) - seem3.u95.log10]
asnm.tier1.all.invivo[,ber.httr := median(aed50.httr.heparg,
aed50.httr.u2os,
aed50.httr.mcf7, na.rm=TRUE) - seem3.u95.log10]
asnm.tier1.all.invivo[,ber.htpp := ifelse(!is.na(aed50.htpp.u2os), aed50.htpp.u2os - seem3.u95.log10, NA)]
asnm.tier1.all.invivo[,ber.astar := median(aed50.astar.beas2b,
aed50.astar.hek293,
aed50.astar.hepg2,
na.rm=TRUE) - seem3.u95.log10]
asnm.tier1.all.invivo[,ber.med.aed50 := med.aed50 - seem3.u95.log10]
review <-
asnm.tier1.all.invivo[ber.med.aed50 < 4]
asnm.tier1.all.invivo[is.na(ber.med.aed50), c('chnm','med.aed50','seem3.u95.log10')] # these are missing exposure values and/or bioactivity values
asnm.tier1.all.invivo <- asnm.tier1.all.invivo[order(-ber.med.aed50)]
asnm.tier1.all.invivo[ber.med.aed50 < 4 & apcra.pro.only==1,
c('chnm','apcra.pro.only','med.aed50','seem3.u95.log10','ber.med.aed50')] #56 rows
asnm.tier1.all.invivo[grep('silane',chnm),c('chnm','dsstox_substance_id','T0','T4','Call','aqc_iv_pass','aqc_indicator')]
asnm.tier1.all.plot <- melt.data.table(asnm.tier1.all.invivo,
id.vars = c('dsstox_substance_id',
'chnm',
'CASRN',
'apcra.pro.only',
"ber.targeted",
"ber.httr",
"ber.htpp",
"ber.astar",
"ber.med.aed50",
"aqc_indicator"),
measure.vars = c("log.repdose.any.5th",
"aed50.atg",
"aed50.bsk",
"aed50.ccte",
"aed50.nvs",
"aed50.stm",
"aed50.httr.mcf7",
"aed50.httr.u2os",
"aed50.httr.heparg",
"aed50.htpp.u2os",
"aed50.astar.beas2b",
"aed50.astar.hepg2",
"aed50.astar.hek293",
"med.aed50",
"min.aed50",
"seem3.u95.log10",
"seem3.log10"),
variable.name = 'types',
value.name = 'values')
asnm.tier1.all.plot <- asnm.tier1.all.plot[order(-ber.med.aed50)]
ber.all <- ggplot(data=asnm.tier1.all.plot[!is.na(ber.med.aed50)],
aes(x=reorder(factor(chnm),-ber.med.aed50),
y=values,
group = factor(types),
shape = factor(types),
color = factor(types)))+
geom_point(size=3)+
scale_colour_viridis(discrete=TRUE,
name = 'Values',
breaks=c("log.repdose.any.5th", "aed50.atg",
"aed50.bsk", "aed50.ccte", "aed50.nvs", "aed50.stm", "aed50.httr.mcf7",
"aed50.httr.u2os", "aed50.httr.heparg", "aed50.htpp.u2os", "aed50.astar.beas2b",
"aed50.astar.hepg2", "aed50.astar.hek293", "seem3.u95.log10", "seem3.log10"),
labels=c('5th%-ile POD All',
'ATG AED50',
'BSK AED50',
'MEA AED50',
'NVS AED50',
'STM AED50',
'HTTr MCF7 AED50',
'HTTr U2OS AED50',
'HTTr HepaRG AED50',
'HTPP U2OS AED50',
'ASTAR BEAS2B AED50',
'ASTAR HepG2 AED50',
'ASTAR HEK293 AED50',
'SEEM3 U95',
'SEEM MED'
)) +
scale_shape_manual(name = 'Values',
breaks=c("log.repdose.any.5th", "aed50.atg",
"aed50.bsk", "aed50.ccte", "aed50.nvs", "aed50.stm", "aed50.httr.mcf7",
"aed50.httr.u2os", "aed50.httr.heparg", "aed50.htpp.u2os", "aed50.astar.beas2b",
"aed50.astar.hepg2", "aed50.astar.hek293", "seem3.u95.log10", "seem3.log10"),
values=c(15,16,17,18,8,9,15,16,17,18,8,9,15,16,17,18,8,9),
labels=c('5th%-ile POD All',
'ATG AED50',
'BSK AED50',
'MEA AED50',
'NVS AED50',
'STM AED50',
'HTTr MCF7 AED50',
'HTTr U2OS AED50',
'HTTr HepaRG AED50',
'HTPP U2OS AED50',
'ASTAR BEAS2B AED50',
'ASTAR HepG2 AED50',
'ASTAR HEK293 AED50',
'SEEM3 U95',
'SEEM MED'))+
xlab('')+
#ylab('log10-mg/kg/day')+
theme_bw() +
theme(
#axis.text.x = element_text(angle=90, vjust=0.5),
axis.title.y = element_text(size=12, face='bold'),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
legend.position = 'bottom')+
scale_y_continuous(breaks=seq(-13,5,2))+
guides(colour=guide_legend(nrow=5))+
annotate("rect",
ymin = -9,
ymax = 7,
xmin = 134,
xmax = 190,
fill = "red",
alpha = 0.1,
size = 1,
color = "red")+
annotate("text",
x = 188,
y=-11,
label="A",
size=10,
hjust=0,
vjust=1)+
coord_flip()
#+
# theme(axis.text.y = element_text(colour=retro))
ber.all
asnm.tier1.b <- asnm.tier1.all.plot[ber.med.aed50 <4 & aqc_indicator==1 & apcra.pro.only==1]
#unique(asnm.tier1.b$dsstox_substance_id) #13 substances
#retro <- ifelse(asnm.tier1.a$apcra.ret==1, "red","black")
#retro.face <- ifelse(asnm.tier1.a$apcra.ret==1, "italic","plain")
ber.simple <- ggplot(data=asnm.tier1.b[ types %in% c("log.repdose.any.5th",
"med.aed50",
"seem3.log10",
"seem3.u95.log10")],
aes(x=reorder(factor(chnm),-ber.med.aed50),
y=values,
group = factor(types),
shape = factor(types),
color = factor(types)))+
geom_point(size=3)+
scale_colour_manual(values=c('#481567FF',
'#2D708EFF',
'#3CBB75FF',
'#95D840FF'),
name = 'Values',
breaks=c("log.repdose.any.5th",
"med.aed50",
"seem3.u95.log10",
"seem3.log10"),
labels=c('5th%-ile POD All',
"Med AED50",
"SEEM3 U95",
"SEEM3"
)) +
scale_shape_manual(name = 'Values',
breaks=c("log.repdose.any.pod",
"med.aed50",
"seem3.u95.log10",
"seem3.log10"),
labels=c('5th%-ile POD All',
"Med AED50",
"SEEM3 U95",
"SEEM3"),
values=c(15,16,17,18))+
xlab('')+
#ylab('log10-mg/kg/day')+
theme_bw() +
theme(
#axis.text.x = element_text(angle=90, vjust=0.5),
axis.text = element_text(size=10),
axis.title.y=element_text(size=14,face='bold'),
axis.title.x = element_blank(),
legend.position = 'bottom')+
scale_y_continuous(breaks=seq(-9,5,2))+
guides(colour=guide_legend(nrow=2))+
annotate("text",
x = 43,
y=-8,
label="C",
size=10,
hjust=0,
vjust=1)+
coord_flip()
#theme(axis.text.y = element_text(colour=retro, face=retro.face))
ber.simple
bplusc <- plot_grid(ber.complex, ber.simple, rel_heights = c(1,1), rel_widths = c(1,1), nrow=2)
plot_grid(ber.all, bplusc, rel_widths = c(1,1), rel_heights = c(1,1), ncol=2)
layout <- matrix(c(1,1,2,2,2,
1,1,2,2,2,
1,1,3,3,3,
1,1,3,3,3), nrow=4, byrow=TRUE)
# multiplot was obtained from
# http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_%28ggplot2%29/
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
require(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
file.dir <- paste("./output", sep="")
file.name <- paste("/Figure_BER_", Sys.Date(), ".png", sep="")
file.path <- paste(file.dir, file.name, sep="")
dir.create(path=file.dir, showWarnings = FALSE, recursive = TRUE)
png(file.path, width = 10000, height = 9000, res=600)
multiplot(ber.all, ber.complex, ber.simple, layout = layout)
dev.off()
## png
## 2
Table1redo <- asnm.tier1.all.invivo[ber.med.aed50< 4 & apcra.pro.only==1 & is.na(log.repdose.any.25th) & aqc_indicator==1, c('dsstox_substance_id','preferred_name','ber.med.aed50', 'log.repdose.any.25th','med.aed50', 'seem3.u95.log10', 'seem3.log10')]
Table1redo <- Table1redo %>% mutate_if(is.numeric, ~round(., 2))
Table1redo
Table2redo <- asnm.tier1.all.invivo[ber.med.aed50 > 3 & med.aed50 > 2 & pod.ratio > -0.5 & aqc_indicator==1, c('dsstox_substance_id','preferred_name','log.repdose.any.5th', 'med.aed50', 'ber.med.aed50', 'pod.ratio', 'aqc_indicator')]
Table2redo <- Table2redo %>% mutate_if(is.numeric, ~round(., 2))
Table2redo
redo_tables <- list('Table1_HighPriorityChems' = as.data.frame(Table1redo),
'Table2_LowPriorityChems' = as.data.frame(Table2redo))
write.xlsx(redo_tables, './output/Draft_Table1_Table2_APCRA_Pro_27May2024.xlsx')
heparg.comp <- asnm.tier1.all.invivo[,c('dsstox_substance_id',
'aed50.httr.heparg',
'aed50.httr.u2os',
'aed50.httr.mcf7',
'med.aed50',
'min.aed50',
'p5.toxval.numeric',
'p25.toxval.numeric'
)]
heparg.comp[, httr.min.aed50.nonheparg := min(aed50.httr.heparg,aed50.httr.mcf7, na.rm = TRUE), by=c('dsstox_substance_id')]
heparg.comp[, httr.heparg.diff := ifelse(!is.na(httr.min.aed50.nonheparg), httr.min.aed50.nonheparg - aed50.httr.heparg, NA), by=c('dsstox_substance_id')]
range(heparg.comp$httr.heparg.diff, na.rm=TRUE) #-3.2 to 0
## [1] -3.199864 0.000000
hist(heparg.comp$httr.heparg.diff, na.rm=TRUE)
heparg.comp.long <- melt.data.table(heparg.comp,
id.vars = c('dsstox_substance_id',
'aed50.httr.heparg'),
measure.vars = c('httr.min.aed50.nonheparg',
'med.aed50',
'p5.toxval.numeric',
'p25.toxval.numeric'),
variable.name = c('POD'),
value.name = c('value'))
heparg.httr <- ggplot(data=heparg.comp.long,
aes(x=aed50.httr.heparg,
y=value))+
geom_point(aes(x=aed50.httr.heparg,
y=value), alpha=0.5, size=2)+
coord_cartesian(xlim=c(-4,6), ylim=c(-4,6))+
theme_bw()+
geom_abline(color='black')+
geom_abline(intercept = 0.5, slope = 1, color="dark gray",
linetype="dashed", size=0.75)+
geom_abline(intercept = -0.5, slope = 1, color="dark gray",
linetype="dashed", size=0.75)+
xlab('HepaRG HTTr AED50, log10-mg/kg/day')+
ylab('POD value, log10-mg/kg/day')+
theme(axis.text = element_text(size=12),
axis.title = element_text(size=14))+
facet_wrap(~POD, scales='fixed')+
theme(strip.background = element_rect(fill='white',size=1),
strip.text = element_text(size=10, color='black',face='bold'))
heparg.httr
file.dir <- paste("./output", sep="")
file.name <- paste("/Figure_HepaRG_", Sys.Date(), ".png", sep="")
file.path <- paste(file.dir, file.name, sep="")
dir.create(path=file.dir, showWarnings = FALSE, recursive = TRUE)
png(file.path, width = 4000, height = 4000, res=600)
heparg.httr
dev.off()
ber.v.seem <- asnm.tier1.all.invivo[,c('dsstox_substance_id','ber.med.aed50', 'seem3.u95.log10','seem3.log10', 'med.aed50')]
ber.v.seem[ ,cred.int.size := seem3.u95.log10 - seem3.log10]
ber_seem <- ggplot(data=ber.v.seem,
aes(x=cred.int.size,
y=ber.med.aed50))+
geom_point(aes(x=cred.int.size,
y=ber.med.aed50), alpha=0.5, size=2)+
#coord_cartesian(xlim=c(-4,6), ylim=c(-4,6))+
theme_bw()+
geom_smooth(method='lm',se=FALSE,color='blue',formula=y~x)+
stat_poly_eq(formula=y ~x,
eq.with.lhs = "y~`=`~",
aes(label=paste0("atop(", ..eq.label.., ",", ..rr.label.., ")")),
label.x.npc = 0.8,
label.y.npc = 0.8,
parse=TRUE)+
xlab('SEEM3 U95 - SEEM3 Median, log10-mg/kg/day')+
ylab('BER')+
theme(axis.text = element_text(size=12),
axis.title = element_text(size=14))
ber_seem
file.dir <- paste("./output", sep="")
file.name <- paste("/Figure_BER_v_SEEMCredInt_", Sys.Date(), ".png", sep="")
file.path <- paste(file.dir, file.name, sep="")
dir.create(path=file.dir, showWarnings = FALSE, recursive = TRUE)
png(file.path, width = 4000, height = 4000, res=600)
ber_seem
dev.off()
## png
## 2
Dev tox flag will be added to the ER/AR flags as a set for DART prediction flag. The dev tox flag consists of Stemina devTox quick-Predict data and an in silico flag from TEST.
Using Zurlinden et al 2019 for reference chemicals. Can see that the devtox TEST prediction is not necessarily accurate.
devref <- fread('./source/in_vitro/devtox_reference_zurlinden_2019.csv')
devref <- devref[-26,]
devref[1:17, ref:= 'TP']
devref[18:26, ref:= 'FN']
devref[27:42, ref:= 'TN']
devref[,c('DTXSID','PREFERRED_NAME','ref','DEVTOX_TEST_PRED')]
What is the overlap with APCRA 201 substances? Seemingly only one true positive…and not one captured by Stemina (PTU).
devref[DTXSID %in% apcra.list$DTXSID]
Load mc5 for some positive reference chemicals for DART overall (dev, ER, AR)
dart.ref <- c(
'DTXSID5022308', #genistein
'DTXSID3020465', #diethylstilbestrol
'DTXSID7020182', #bisphenol a
'DTXSID7032004', #flutamide
'DTXSID4022361', #vinclozolin
'DTXSID7021239', #retinoic acid
'DTXSID9022524', #thalidomide
'DTXSID2020634', #5-fluorouracil
'DTXSID6025438', #hydroxyurea
'DTXSID1020194' #boric acid
)
Here we pull the DART reference chemical mc5 for Stemina from invitrodb v3.5.
dart.mc5 <- tcplPrepOtpt(tcplLoadData(lvl=5, type='mc',fld='aeid', val=c(1691,1858)))
dart.mc5 <- dart.mc5[dsstox_substance_id %in% dart.ref]
stm <- mega.mc5[asnm=='STM']
#length(unique(stm$dsstox_substance_id)) #66
stm.mc.dtxsids <- unique(stm$dsstox_substance_id) # 6 of these were added as positives from sc that never advanced to mc (from previous processing)
#stm[hitc==1 & is.na(modl_tp)]
#added.dtxsid <- stm[is.na(modl_prob), dsstox_substance_id]
#stm[dsstox_substance_id %in% added.dtxsid, modl_acc := 2]
#stm[dsstox_substance_id %in% added.dtxsid, acc_uM := 100]
# cast data to combine them.
mc5.stm.wide <- dcast.data.table(stm[aeid %in% c(1691,1858)], dsstox_substance_id + chnm ~ aenm,
value.var = 'modl_acc',
fun.aggregate= min)
mc5.dartref.wide <- dcast.data.table(dart.mc5[aeid %in% c(1691,1858)], dsstox_substance_id + chnm ~ aenm,
value.var = 'modl_acc',
fun.aggregate= min)
mc5.stm.wide <- rbind(mc5.stm.wide,
mc5.dartref.wide)
add.wide <- dcast.data.table(sc2.stm[aeid==1691 & !dsstox_substance_id %in% stm.mc.dtxsids & hitc==0], dsstox_substance_id + chnm ~aenm,
value.var='hitc',
fun.aggregate=max)
add.wide[,STM_H9_Viability_norm := NA]
#length(unique(add.wide$dsstox_substance_id)) #133
dev.flag <- rbind(mc5.stm.wide,
add.wide, fill=TRUE)
dev.flag[,stm.cyto.dist := ifelse(!is.na(STM_H9_Viability_norm), STM_H9_Viability_norm - STM_H9_OrnCyssISnorm_ratio_dn, 3 - STM_H9_OrnCyssISnorm_ratio_dn)]
dev.flag[,dev.flag := 0]
dev.flag[!is.na(STM_H9_OrnCyssISnorm_ratio_dn), dev.flag := 1]
dev.flag[,dev.flag.specific := 0]
dev.flag[stm.cyto.dist >0.25, dev.flag.specific := 1]
Add TEST DEV TOX prediction and the applicability domain.
load('./source/chem/apcra_chem_ad.RData')
setnames(test.opera.pred.total, c('DTXSID','PREFERRED_NAME'), c('dsstox_substance_id','chnm'), skip_absent = TRUE)
dev.flag$test.devtox.score <- test.opera.pred.total$DEVTOX_TEST_PRED[match(dev.flag$dsstox_substance_id,
test.opera.pred.total$dsstox_substance_id)]
# replace Inf with NA
for (j in 1:ncol(dev.flag)) set(dev.flag, which(is.infinite(dev.flag[[j]])), j, NA)
# replace - with NA
dev.flag[,c('test.devtox.score')] <- lapply(dev.flag[,c('test.devtox.score')], function(col) as.numeric(gsub("-$|\\,",NA, col)))
dev.flag$aqc_indicator <- ad.tbl$aqc_indicator[match(dev.flag$dsstox_substance_id, ad.tbl$DTXSID)]
dev.flag$apcra.pro.only <- ad.tbl$apcra.pro.only[match(dev.flag$dsstox_substance_id, ad.tbl$DTXSID)]
total.endo.dtxsid <- c(apcra.list$DTXSID, dart.ref)
## here we should really get the dtxsids for cerapp...
cerapp.compara <- merge.data.table(cerapp[,c('CASRN','CHEMICAL_NAME','input_SMILES','InChI_Key',
'Potency_class_2_binding', 'Potency_class_2_agonist', 'Potency_class_2_antagonist','consensus_2_binding', 'consensus_2_agonist','consensus_2_antagonist')],
by.x = 'CASRN',
all.x=TRUE,
compara[, c('dsstox_substance_id','casrn','consensus_binding', 'consensus_agonist','consensus_antagonist')],
by.y='casrn',
all.y=TRUE)
colnames(er) <- paste0('er_model_', colnames(er))
er.cerapp.compara <- merge.data.table(cerapp.compara,
er,
by.x='CASRN',
by.y='er_model_CASRN',
all.x=TRUE)
colnames(ar) <- paste0('ar_model_', colnames(ar))
er.ar.cerapp.compara <- merge.data.table(er.cerapp.compara,
ar,
by.x='CASRN',
by.y='ar_model_CASRN',
all.x=TRUE)
er.ar.apcra <- er.ar.cerapp.compara[dsstox_substance_id %in% total.endo.dtxsid]
col.num <- c('consensus_2_binding', 'consensus_2_agonist', 'consensus_2_antagonist')
er.ar.apcra[, (col.num) := lapply (.SD, as.numeric), .SDcols = col.num ]
# indicate any active result from compara consensus qsar
er.ar.apcra[is.na(consensus_binding & consensus_agonist & consensus_antagonist),compara.flag := 2] # if data not available
er.ar.apcra[consensus_binding==0 & consensus_agonist==0 & consensus_antagonist==0, compara.flag := 0] # if data are all negative
er.ar.apcra[consensus_binding==1|consensus_agonist==1|consensus_antagonist==1, compara.flag := 1] # if any mode is positive
# indicate any active result from cerapp consensus qsar
er.ar.apcra[is.na(consensus_2_agonist & consensus_2_binding & consensus_2_antagonist),cerapp.flag := 2]
er.ar.apcra[consensus_2_agonist==0 & consensus_2_binding==0 & consensus_2_antagonist==0, cerapp.flag := 0]
er.ar.apcra[consensus_2_agonist==1|consensus_2_binding==1|consensus_2_antagonist==1, cerapp.flag := 1]
# create positive, equivocal, and negative code on ToxCast AR model
er.ar.apcra[is.na(ar_model_Agonist_AUC & ar_model_Antagonist_AUC), toxcast.ar.flag := 2] # data not available
er.ar.apcra[ar_model_Agonist_AUC < 0.1 & ar_model_Antagonist_AUC < 0.1|ar_model_Antagonist_AUC > 0.1 & ar_model_Antagonist_Confidence_Score <= 2, toxcast.ar.flag := 0] # grouped equivocals into the negative space
er.ar.apcra[ar_model_Agonist_AUC >= 0.1|ar_model_Antagonist_AUC >= 0.1 & ar_model_Antagonist_Confidence_Score >2,
toxcast.ar.flag := 1]
# create positive, equivocal, and negative code on ToxCast ER model
er.ar.apcra[is.na(er_model_Agonist_AUC & er_model_Antagonist_AUC), toxcast.er.flag := 2] # no data available
er.ar.apcra[er_model_Agonist_AUC< 0.1 & er_model_Antagonist_AUC < 0.1, toxcast.er.flag := 0] # grouped equivocals into the negative space
er.ar.apcra[er_model_Agonist_AUC >= 0.1|er_model_Antagonist_AUC >= 0.1, toxcast.er.flag := 1]
# require toxcast er model data to trump cerapp call
er.ar.apcra[toxcast.er.flag ==2 & cerapp.flag==2, final.er.flag :=0.1] # no data
er.ar.apcra[toxcast.er.flag ==2 & cerapp.flag==0, final.er.flag :=0] # only cerapp data and it's negative
er.ar.apcra[toxcast.er.flag ==2 & cerapp.flag==1, final.er.flag :=0.5] # only cerapp data and it's positive
er.ar.apcra[toxcast.er.flag ==0 & cerapp.flag %in% c(0,1,2), final.er.flag := 0] # er model available and negative
er.ar.apcra[toxcast.er.flag ==1 & cerapp.flag %in% c(0,1,2), final.er.flag := 1] # er model available and positive
# require toxcast ar model data to trump compara call
er.ar.apcra[toxcast.ar.flag ==2 & compara.flag==2, final.ar.flag :=0.1] # no data
er.ar.apcra[toxcast.ar.flag ==2 & compara.flag==0, final.ar.flag :=0] # only cerapp data and it's negative
er.ar.apcra[toxcast.ar.flag ==2 & compara.flag==1, final.ar.flag :=0.5] # only cerapp data and it's positive
er.ar.apcra[toxcast.ar.flag ==0 & compara.flag %in% c(0,1,2), final.ar.flag := 0] # er model available and negative
er.ar.apcra[toxcast.ar.flag ==1 & compara.flag %in% c(0,1,2), final.ar.flag := 1] # er model available and positive
er.ar.apcra[, ed.flag.sum := sum(final.er.flag, final.ar.flag), by=list(dsstox_substance_id)]
er.ar.flag <- er.ar.apcra[,c("dsstox_substance_id","compara.flag","cerapp.flag","toxcast.ar.flag","toxcast.er.flag","final.er.flag", "final.ar.flag","ed.flag.sum")]
load('./output/APCRA_haz_flg_BER.RData')
# add minimum NAM column for comparison
dev.flag$min_nam <- asnm.tier1$col_min[match(dev.flag$dsstox_substance_id,
asnm.tier1$dsstox_substance_id)]
dev.flag[min_nam=='CCTE', min_nam := 'MEA'] # the CCTE vendor is for the acute MEA data from Shafer lab
# define the binary flag for the TEST dev model
dev.flag[test.devtox.score > 0.7, dev.test := 0.5]
dev.flag[test.devtox.score < 0.7, dev.test := 0]
# merge dev and ER/AR flag
dart.flag <- merge.data.table(dev.flag,
er.ar.flag,
by=c('dsstox_substance_id'),
all.x=TRUE,
all.y=TRUE)
# make sure chnm is not missing
setnames(apcra.total, 'DTXSID', 'dsstox_substance_id')
setnames(apcra.total, 'preferred_name', 'chnm')
dart.flag.na <- dart.flag[is.na(chnm)]
dart.flag$chnm <- apcra.total$chnm[match(dart.flag$dsstox_substance_id,
apcra.total$dsstox_substance_id)]
dart.flag$chnm_dev <- dev.flag$chnm[match(dart.flag$dsstox_substance_id,
dev.flag$dsstox_substance_id)]
dart.flag[is.na(chnm), chnm := chnm_dev]
dart.flag[,c('chnm_dev') := NULL]
# add BER
dart.flag$ber.med.aed50 <- asnm.tier1.all.invivo$ber.med.aed50[match(dart.flag$dsstox_substance_id,
asnm.tier1.all.invivo$dsstox_substance_id)]
dart.flag <- dart.flag[order(ber.med.aed50)]
# further refine columns of data available
dart_mat <- dart.flag[apcra.pro.only==1]
dart_mat <- dart_mat[aqc_indicator==1]
dart_mat <- dart_mat[ber.med.aed50 < 4 ]
dart_ref_mat <- unique(dart.flag[dsstox_substance_id %in% dart.ref])
dart_ref_mat$chnm <- dev.flag$chnm[match(dart_ref_mat$dsstox_substance_id,
dev.flag$dsstox_substance_id)]
setnames(dart_mat, c('dev.test',
'dev.flag',
'dev.flag.specific',
'final.er.flag',
'final.ar.flag'),
c('DEV-TEST','DEV','DEV-S','ER','AR'))
setnames(dart_ref_mat, c('dev.test',
'dev.flag',
'dev.flag.specific',
'final.er.flag',
'final.ar.flag'),
c('DEV-TEST','DEV','DEV-S','ER','AR'))
# comprise the main matrices
dart_mat2 <- as.matrix(dart_mat[,c('DEV-TEST','DEV','DEV-S','ER','AR')])
rownames(dart_mat2) <- dart_mat[, chnm] # define rownames as chemical name
dart_ref2 <- as.matrix(dart_ref_mat[,c('DEV-TEST','DEV','DEV-S','ER','AR')])
rownames(dart_ref2) <- dart_ref_mat[, chnm] # define rownames as chemical name
head(dart_ref2)
## DEV-TEST DEV DEV-S ER AR
## Flutamide 0.5 1 1 0 1
## Boric acid (H3BO3) NA 0 0 NA NA
## 5-Fluorouracil NA 1 0 0 0
## Diethylstilbestrol 0.5 0 0 1 1
## Vinclozolin 0.5 0 0 0 1
## Genistein 0.5 1 0 1 0
# annotations for main hm
anno_df <- data.frame(rownames(dart_mat2))
anno_df$min_nam <- dart.flag$min_nam[match(anno_df[,1],
dart.flag$chnm)]
anno_df$chnm <- dart.flag$chnm[match(anno_df[,1],
dart.flag$chnm)]
anno_df$ber <- dart.flag$ber.med.aed50[match(anno_df[,1],
dart.flag$chnm)]
#left_ha <- rowAnnotation(min_nam = anno_text(anno_df$min_nam,
# just='right',
# location=1,
# show_name = FALSE))
right_ha <- rowAnnotation(BER = anno_barplot(anno_df$ber, width = unit(4,'cm')))
# annotations for ref hm
anno_ref <- data.frame(rownames(dart_ref2))
anno_ref$chnm <- dart.flag$chnm[match(anno_ref[,1],
dart.flag$chnm)]
hm_dart <- Heatmap(matrix = dart_mat2,
cluster_columns = FALSE,
cluster_rows=FALSE,
name="log10-mg/kg/day",
#col=col_fun,
na_col='gray',
#col= colors,
col = colorRamp2(breaks = c(0, 0.5, 1),
colors = c("white" , "#3CBB75FF", "#440154FF")),
#col = list(type=c("1" = "#2166ac","0"= "#f7f7f7")),
show_row_names = TRUE,
row_dend_width = unit(3, "cm"),
column_names_max_height = unit(8, "cm"),
column_names_gp = gpar(fontsize = 12),
heatmap_legend_param = list(title = "In silico/In vitro",legend_direction = 'horizontal'),
width=unit(6,'cm'),
height = unit(14, 'cm'),
rect_gp = gpar(col = "gray", lwd = 2),
column_names_side='top',
#left_annotation = left_ha,
right_annotation = right_ha,
row_names_gp=gpar(fontsize=10,fontfamily='sans'))
hm_dart_ref <- Heatmap(matrix = dart_ref2,
cluster_columns = FALSE,
cluster_rows=FALSE,
name="log10-mg/kg/day",
#col=col_fun,
na_col='gray',
#col= colors,
col = colorRamp2(breaks = c(0, 0.5, 1),
colors = c("white" , "#3CBB75FF", "#440154FF")),
#col = list(type=c("1" = "#2166ac","0"= "#f7f7f7")),
show_row_names = TRUE,
#row_dend_width = unit(3, "cm"),
#column_names_max_height = unit(8, "cm"),
column_names_gp = gpar(fontsize = 12),
heatmap_legend_param = list(title = "In silico/In vitro",legend_direction = 'horizontal'),
#width=unit(6,'cm'),
height = unit(4, 'cm'),
rect_gp = gpar(col = "gray", lwd = 2),
column_names_side='top',
row_names_gp=gpar(fontsize=10,fontfamily='sans'))
hm.ed <- draw(hm_dart %v% hm_dart_ref,
heatmap_legend_side='bottom',
ht_gap = unit(1, "cm"))
file.dir <- paste("./output", sep="")
file.name <- paste("/Figure_DART_HM_", Sys.Date(), ".png", sep="")
file.path <- paste(file.dir, file.name, sep="")
dir.create(path=file.dir, showWarnings = FALSE, recursive = TRUE)
png(file.path, width = 4000, height = 5000, res=450)
hm.ed
dev.off()
## png
## 2
The BSK data, or BioMAP panel as it is now known, contain endpoints potentially relevant to immunosuppression. We need to annotate the specific endpoints to calculate the potency of potential immunosuppression and the potency of potential “acute toxicity” (or really, cytotoxicity) in these pathophysiological systems.
bsk.aeid <- unique(mega.mc5[asnm=='BSK',c('aeid','aenm')])
bsk.aeid[,cells := tstrsplit(aenm, "_", fixed=TRUE, keep=c(2))]
bsk.aeid[,endpoint := tstrsplit(aenm, "_", fixed=TRUE, keep=c(3))]
bsk.aeid[,direction := tstrsplit(aenm, "_", fixed=TRUE, keep=c(4))]
## label signature activity groups
bsk.aeid[,activity := as.character(NA)]
bsk.aeid[grep('SRB_down', aenm),activity := 'cytotoxicity']
bsk.aeid[grep('_3C_Proliferation_down',aenm), activity := 'liver toxicity'] #endothelial cell signal
bsk.aeid[grep('SAg_Proliferation_down',aenm), activity := 'immunosuppression'] # decreased T-cells
bsk.aeid[grep('BT_sIgG_down', aenm), activity := 'immunosuppression'] # decreased IgG
bsk.aeid[grep('BT_Bcell_Proliferation_down',aenm), activity := 'immunosuppression'] # decreased B cell proliferation
bsk.aeid[grep('PBMCCytotoxicity_down', aenm), activity := 'immunosuppression'] #cytotoxic to peripheral blood mononuclear cells
bsk.aeid[grep('3C_Thrombomodulin_up', aenm), activity := 'thrombosis']
# next systems are at 'non-cytotoxic concentrations'
bsk.aeid[grep('LPS_PGE2_up', aenm), activity := 'skin irritation']
bsk.aeid[grep('LPS_TNFa_up', aenm), activity := 'skin irritation']
bsk.aeid[grep('hDFCGF_CollagenIII_down', aenm), activity := 'skin sensitization']
bsk.aeid[grep('hDFCGF_VCAM1_up', aenm), activity := 'skin rash']
bsk.aeid[grep('CASM3C_SAA_up', aenm), activity := 'vascular toxicity']
bsk.aeid[aenm=='BSK_IMphg_IL10_down', activity := 'immunosuppression']
kable(bsk.aeid[activity %in% c('immunosuppression','skin irritation','skin rash','skin sensitization')])
| aeid | aenm | cells | endpoint | direction | activity |
|---|---|---|---|---|---|
| 235 | BSK_hDFCGF_CollagenIII_down | hDFCGF | CollagenIII | down | skin sensitization |
| 258 | BSK_hDFCGF_VCAM1_up | hDFCGF | VCAM1 | up | skin rash |
| 290 | BSK_LPS_PGE2_up | LPS | PGE2 | up | skin irritation |
| 296 | BSK_LPS_TNFa_up | LPS | TNFa | up | skin irritation |
| 313 | BSK_SAg_PBMCCytotoxicity_down | SAg | PBMCCytotoxicity | down | immunosuppression |
| 315 | BSK_SAg_Proliferation_down | SAg | Proliferation | down | immunosuppression |
| 2810 | BSK_BT_Bcell_Proliferation_down | BT | Bcell | Proliferation | immunosuppression |
| 2812 | BSK_BT_PBMCCytotoxicity_down | BT | PBMCCytotoxicity | down | immunosuppression |
| 2814 | BSK_BT_sIgG_down | BT | sIgG | down | immunosuppression |
| 2928 | BSK_IMphg_IL10_down | IMphg | IL10 | down | immunosuppression |
In the BioMAP (BSK) panel, we also ran immunosuppressive drugs as controls. We can use these as reference chemicals, but they are not in the APCRA list of chemicals and so we need to retrieve this from invitrodb v3.5.
Note that in invitrodb v3.5, BSK data are represented as ac50 (modl_ga), but are actually lowest effect concentrations. This modeling choice was related to the low number of concentrations (most often 4), low number of replicates (most often 2), and low top-over-cutoff typically observed.Standard curve-fitting models had resulted in extremely low ac50s and so we chose a lowest effect concentration approach.
immunosupp.drug.dtxsids <- c('DTXSID0020365', # cyclosporin A
'DTXSID3047429', # dexamethasone sodium phosphate
'DTXSID4020119', # azathioprine
'DTXSID4020822' # methotrexate
)
spids <- tcplLoadChem(field='dsstox_substance_id',val=immunosupp.drug.dtxsids) # get sample ids
mc5.bsk.ref <- tcplPrepOtpt(tcplLoadData(lvl=5,type='mc',fld='aeid', val=bsk.aeid$aeid)) # get mc5 data
mc5.bsk.ref <- mc5.bsk.ref[dsstox_substance_id %in% spids$dsstox_substance_id] # narrow mc5 data to immunosuppressive drugs
mc5.bsk.combined <- rbind(mega.mc5[asnm=='BSK'],
mc5.bsk.ref,
fill=TRUE)
bsk.flag <- unique(mc5.bsk.combined[ , list(
total.endpoints.screened = .N, #total number of aeids tested in mc
active.assay.count = as.double(length(which(hitc==1))), # active count
inactive.assay.count = as.double(length(which(hitc==0))), #inactive count
active.percent = round((length(which(hitc==1))/.N)*100,2), #active percent
inactive.percent = round((length(which(hitc==0))/.N)*100,2), #inactive percent
min.log.lec = min(modl_ga, na.rm=TRUE),
med.log.lec = median(modl_ga, na.rm=TRUE),
acute.tox = min(modl_ga[aenm %in% c("BSK_3C_SRB_down",
"BSK_4H_SRB_down",
"BSK_BE3C_SRB_down",
"BSK_CASM3C_SRB_down",
"BSK_hDFCGF_SRB_down",
"BSK_KF3CT_SRB_down",
"BSK_LPS_SRB_down",
"BSK_SAg_SRB_down",
"BSK_MyoF_SRB_down",
"BSK_BF4T_SRB_down",
"BSK_IMphg_SRB_down")], na.rm=TRUE),
skin.irrit.lec = min(modl_ga[aenm=='BSK_LPS_PGE2_up'], na.rm=TRUE),
skin.irrit.spec = (min(modl_ga[aenm %in% c("BSK_3C_SRB_down",
"BSK_4H_SRB_down",
"BSK_BE3C_SRB_down",
"BSK_CASM3C_SRB_down",
"BSK_hDFCGF_SRB_down",
"BSK_KF3CT_SRB_down",
"BSK_LPS_SRB_down",
"BSK_SAg_SRB_down",
"BSK_MyoF_SRB_down",
"BSK_BF4T_SRB_down",
"BSK_IMphg_SRB_down")], na.rm=TRUE)) - (min(modl_ga[aenm=='BSK_LPS_PGE2_up'], na.rm=TRUE)),
skin.rash.lec = min(modl_ga[aenm %in% c('BSK_hDFCGF_VCAM1_up')], na.rm=TRUE),
skin.rash.spec = (min(modl_ga[aenm %in% c("BSK_3C_SRB_down", "BSK_4H_SRB_down", "BSK_BE3C_SRB_down",
"BSK_CASM3C_SRB_down", "BSK_hDFCGF_SRB_down", "BSK_KF3CT_SRB_down",
"BSK_LPS_SRB_down", "BSK_SAg_SRB_down", "BSK_MyoF_SRB_down",
"BSK_BF4T_SRB_down", "BSK_IMphg_SRB_down")], na.rm=TRUE)) - (min(modl_ga[aenm %in% c('BSK_hDFCGF_VCAM1_up')], na.rm=TRUE)),
skin.sens.lec = min(modl_ga[aenm=='BSK_hDFCGF_CollagenIII_down']),
skin.sens.spec = (min(modl_ga[aenm %in% c("BSK_3C_SRB_down", "BSK_4H_SRB_down", "BSK_BE3C_SRB_down",
"BSK_CASM3C_SRB_down", "BSK_hDFCGF_SRB_down", "BSK_KF3CT_SRB_down",
"BSK_LPS_SRB_down", "BSK_SAg_SRB_down", "BSK_MyoF_SRB_down",
"BSK_BF4T_SRB_down", "BSK_IMphg_SRB_down")], na.rm=TRUE)) - (min(modl_ga[aenm %in% c('BSK_hDFCGF_CollagenIII_down')], na.rm=TRUE)),
skin.inflamm.lec = min(modl_ga[aenm %in% c('BSK_hDFCGF_CollagenIII_down','BSK_hDFCGF_VCAM1_up','BSK_LPS_PGE2_up')], na.rm=TRUE),
skin.inflamm.spec = (min(modl_ga[aenm %in% c("BSK_3C_SRB_down", "BSK_4H_SRB_down", "BSK_BE3C_SRB_down",
"BSK_CASM3C_SRB_down", "BSK_hDFCGF_SRB_down", "BSK_KF3CT_SRB_down",
"BSK_LPS_SRB_down", "BSK_SAg_SRB_down", "BSK_MyoF_SRB_down",
"BSK_BF4T_SRB_down", "BSK_IMphg_SRB_down")], na.rm=TRUE)) - (min(modl_ga[aenm %in% c('BSK_hDFCGF_CollagenIII_down','BSK_hDFCGF_VCAM1_up','BSK_LPS_PGE2_up')], na.rm=TRUE)),
immun.sup.lec = min(modl_ga[aenm %in% bsk.aeid[activity=='immunosuppression']$aenm], na.rm=TRUE),
immun.sup.spec = (min(modl_ga[aenm %in% c("BSK_3C_SRB_down", "BSK_4H_SRB_down", "BSK_BE3C_SRB_down",
"BSK_CASM3C_SRB_down", "BSK_hDFCGF_SRB_down", "BSK_KF3CT_SRB_down",
"BSK_LPS_SRB_down", "BSK_SAg_SRB_down", "BSK_MyoF_SRB_down",
"BSK_BF4T_SRB_down", "BSK_IMphg_SRB_down")], na.rm=TRUE)) - (min(modl_ga[aenm %in% c('BSK_3C_Proliferation_down')], na.rm=TRUE))
), by = list(dsstox_substance_id, chnm, casn)]) # could make this by spid because if there are n>1 spid the assay number will mess up
invisible(lapply(names(bsk.flag), function(.name) set(bsk.flag, which(is.infinite(bsk.flag[[.name]])), j=.name, value=NA))) # remove Inf
invisible(lapply(names(bsk.flag), function(.name) set(bsk.flag, which(is.nan(bsk.flag[[.name]])), j=.name, value=NA))) # remove NaN
bsk.flag[,selective := ifelse(!is.na(acute.tox), acute.tox-immun.sup.lec, 3-immun.sup.lec)]
colnames(bsk.flag)
Wrangling the data to visualize and adding the applicability domain information.
bsk.flag2 <- bsk.flag[,c('dsstox_substance_id',
'chnm',
'acute.tox',
'immun.sup.lec',
'selective')]
bsk.flag2$aqc_indicator <- ad.tbl$aqc_indicator[match(bsk.flag2$dsstox_substance_id, ad.tbl$DTXSID)]
bsk.flag2$apcra.pro.only <- ad.tbl$apcra.pro.only[match(bsk.flag2$dsstox_substance_id, ad.tbl$DTXSID)]
# add BER
bsk.flag2$ber.med.aed50 <- asnm.tier1.all.invivo$ber.med.aed50[match(bsk.flag2$dsstox_substance_id,
asnm.tier1.all.invivo$dsstox_substance_id)]
bsk.flag2 <- bsk.flag2[order(ber.med.aed50)]
Examine the pattern of the MEA data. In the end this was not very fruitful.
mea.aeid <- unique(mega.mc5[asnm=='CCTE', c('aeid','aenm')])
mea.aeid[,process := tstrsplit(aenm, "CCTE_Shafer_MEA_", keep=c(2))]
mea.aeid[, direction := str_extract(aenm, "[a-z]{2}$")]
## label signature activity groups
### General Firing Activity
mea.aeid[,activity := as.character(NA)]
mea.aeid[grep('firing', aenm),activity := 'Firing']
mea.aeid[grep('MFR',aenm), activity := 'Firing']
mea.aeid[grep('acute_burst_number',aenm), activity := "Firing"]
mea.aeid[grep('acute_spike_number',aenm), activity := "Firing"]
### Burst structure
mea.aeid[is.na(activity) & grep('burst',aenm), activity := "Bursting"]
mea.aeid[is.na(activity) & grep('spike',aenm), activity := "Bursting"]
### Connectivity
mea.aeid[grep('network', aenm), activity := 'Connectivity']
mea.aeid[grep('cross_correlation', aenm), activity := 'Connectivity']
mea.aeid[grep('synchrony', aenm), activity := 'Connectivity']
### Cytotoxicity
mea.aeid[grep('LDH', aenm), activity := 'Cytotoxicity']
mea.aeid[grep('AB', aenm), activity := 'Cytotoxicity']
mea.aeid <- mea.aeid[order(activity,aeid)]
# write.csv
#write.csv(mea.aeid,"./output/mea_aeid_annotation_17apr2021.csv")
# examine
kable(mea.aeid,
filter='top',
options=list(pagelength=25, autoWidth=FALSE, scrollX=TRUE, initComplete = JS(
"function(settings, json) {",
"$('body').css({'font-family': 'Calibri'});",
"}"
)))
| aeid | aenm | process | direction | activity |
|---|---|---|---|---|
| 2460 | CCTE_Shafer_MEA_acute_burst_duration_mean_dn | acute_burst_duration_mean_dn | dn | Bursting |
| 2461 | CCTE_Shafer_MEA_acute_burst_duration_mean_up | acute_burst_duration_mean_up | up | Bursting |
| 2462 | CCTE_Shafer_MEA_acute_per_burst_spike_number_mean_dn | acute_per_burst_spike_number_mean_dn | dn | Bursting |
| 2463 | CCTE_Shafer_MEA_acute_per_burst_spike_number_mean_up | acute_per_burst_spike_number_mean_up | up | Bursting |
| 2464 | CCTE_Shafer_MEA_acute_interburst_interval_mean_dn | acute_interburst_interval_mean_dn | dn | Bursting |
| 2465 | CCTE_Shafer_MEA_acute_interburst_interval_mean_up | acute_interburst_interval_mean_up | up | Bursting |
| 2466 | CCTE_Shafer_MEA_acute_burst_percentage_mean_dn | acute_burst_percentage_mean_dn | dn | Bursting |
| 2467 | CCTE_Shafer_MEA_acute_burst_percentage_mean_up | acute_burst_percentage_mean_up | up | Bursting |
| 2468 | CCTE_Shafer_MEA_acute_burst_percentage_std_dn | acute_burst_percentage_std_dn | dn | Bursting |
| 2469 | CCTE_Shafer_MEA_acute_burst_percentage_std_up | acute_burst_percentage_std_up | up | Bursting |
| 2470 | CCTE_Shafer_MEA_acute_per_network_burst_spike_number_mean_dn | acute_per_network_burst_spike_number_mean_dn | dn | Connectivity |
| 2471 | CCTE_Shafer_MEA_acute_per_network_burst_spike_number_mean_up | acute_per_network_burst_spike_number_mean_up | up | Connectivity |
| 2472 | CCTE_Shafer_MEA_acute_per_network_burst_spike_number_std_dn | acute_per_network_burst_spike_number_std_dn | dn | Connectivity |
| 2473 | CCTE_Shafer_MEA_acute_per_network_burst_spike_number_std_up | acute_per_network_burst_spike_number_std_up | up | Connectivity |
| 2474 | CCTE_Shafer_MEA_acute_per_network_burst_electrodes_number_mean_dn | acute_per_network_burst_electrodes_number_mean_dn | dn | Connectivity |
| 2475 | CCTE_Shafer_MEA_acute_per_network_burst_electrodes_number_mean_up | acute_per_network_burst_electrodes_number_mean_up | up | Connectivity |
| 2476 | CCTE_Shafer_MEA_acute_network_burst_percentage_dn | acute_network_burst_percentage_dn | dn | Connectivity |
| 2477 | CCTE_Shafer_MEA_acute_network_burst_percentage_up | acute_network_burst_percentage_up | up | Connectivity |
| 2478 | CCTE_Shafer_MEA_acute_cross_correlation_area_dn | acute_cross_correlation_area_dn | dn | Connectivity |
| 2479 | CCTE_Shafer_MEA_acute_cross_correlation_area_up | acute_cross_correlation_area_up | up | Connectivity |
| 2480 | CCTE_Shafer_MEA_acute_cross_correlation_HWHM_dn | acute_cross_correlation_HWHM_dn | dn | Connectivity |
| 2481 | CCTE_Shafer_MEA_acute_cross_correlation_HWHM_up | acute_cross_correlation_HWHM_up | up | Connectivity |
| 2482 | CCTE_Shafer_MEA_acute_synchrony_index_dn | acute_synchrony_index_dn | dn | Connectivity |
| 2483 | CCTE_Shafer_MEA_acute_synchrony_index_up | acute_synchrony_index_up | up | Connectivity |
| 2540 | CCTE_Shafer_MEA_acute_LDH_up | acute_LDH_up | up | Cytotoxicity |
| 2541 | CCTE_Shafer_MEA_acute_AB_dn | acute_AB_dn | dn | Cytotoxicity |
| 2454 | CCTE_Shafer_MEA_acute_spike_number_dn | acute_spike_number_dn | dn | Firing |
| 2455 | CCTE_Shafer_MEA_acute_spike_number_up | acute_spike_number_up | up | Firing |
| 2456 | CCTE_Shafer_MEA_acute_firing_rate_mean_dn | acute_firing_rate_mean_dn | dn | Firing |
| 2457 | CCTE_Shafer_MEA_acute_firing_rate_mean_up | acute_firing_rate_mean_up | up | Firing |
| 2458 | CCTE_Shafer_MEA_acute_burst_number_dn | acute_burst_number_dn | dn | Firing |
| 2459 | CCTE_Shafer_MEA_acute_burst_number_up | acute_burst_number_up | up | Firing |
mea.flag <- unique(mega.mc5[asnm=='CCTE' , list(
total.endpoints.screened = .N, #total number of aeids tested in mc
active.assay.count = as.double(length(which(hitc==1))), # active count
inactive.assay.count = as.double(length(which(hitc==0))), #inactive count
active.percent = round((length(which(hitc==1))/.N)*100,2), #active percent
inactive.percent = round((length(which(hitc==0))/.N)*100,2), #inactive percent
min.log.acc = min(modl_acc, na.rm=TRUE),
med.log.acc = median(modl_acc, na.rm=TRUE),
p5.log.acc = quantile(modl_acc, c(0.05),na.rm=TRUE),
up.ct = as.double(length(which(hitc==1 & aeid %in% mea.aeid[direction=='up']$aeid))),
dn.ct = as.double(length(which(hitc==1 & aeid %in% mea.aeid[direction=='dn']$aeid))),
bursting = min(modl_acc[aeid %in% mea.aeid[activity=='Bursting']$aeid], na.rm=TRUE),
connectivity = min(modl_acc[aeid %in% mea.aeid[activity=='Connectivity']$aeid], na.rm=TRUE),
firing = min(modl_acc[aeid %in% mea.aeid[activity=='Firing']$aeid], na.rm=TRUE),
cytotoxicity = min(modl_acc[aeid %in% mea.aeid[activity=='Cytotoxicity']$aeid], na.rm=TRUE)
), by = list(dsstox_substance_id, chnm, casn, spid)]) # could make this by spid because if there are n>1 spid the assay number will mess up
invisible(lapply(names(mea.flag), function(.name) set(mea.flag, which(is.infinite(mea.flag[[.name]])), j=.name, value=NA))) # remove Inf
invisible(lapply(names(mea.flag), function(.name) set(mea.flag, which(is.nan(mea.flag[[.name]])), j=.name, value=NA))) # remove NaN
mea.flag[,bursting.spec := ifelse(!is.na(cytotoxicity), cytotoxicity - bursting, 3-bursting), by=list(dsstox_substance_id)]
mea.flag[,connectivity.spec := ifelse(!is.na(cytotoxicity), cytotoxicity - connectivity, 3-connectivity), by=list(dsstox_substance_id)]
mea.flag[,firing.spec := ifelse(!is.na(cytotoxicity), cytotoxicity - firing, 3-firing), by=list(dsstox_substance_id)]
mea <- tcplPrepOtpt(tcplLoadData(lvl=5,type='mc',fld='aeid',val=mea.aeid$aeid))
#mea.ref <- mea[ chnm %in% c('Tributyltin chloride','Abamectin','Lindane','beta-Cyfluthrin')]
mea.ref <- mea[dsstox_substance_id %in% c(
'DTXSID2020686', #lindane
'DTXSID3027403', # tributyltin chloride
'DTXSID8023892', # abamectin
'DTXSID8032330', # beta-Cyfluthrin
'DTXSID9058238')] # avermectin B1a
mea.ref.dtxsids <- c(
'DTXSID2020686', #lindane
'DTXSID3027403', # tributyltin chloride
'DTXSID8023892', # abamectin
'DTXSID8032330', # beta-Cyfluthrin
'DTXSID9058238') # avermectin B1a
mea.flag.ref <- unique(mea.ref[ , list(
total.endpoints.screened = .N, #total number of aeids tested in mc
active.assay.count = as.double(length(which(hitc==1))), # active count
inactive.assay.count = as.double(length(which(hitc==0))), #inactive count
active.percent = round((length(which(hitc==1))/.N)*100,2), #active percent
inactive.percent = round((length(which(hitc==0))/.N)*100,2), #inactive percent
min.log.acc = min(modl_acc, na.rm=TRUE),
med.log.acc = median(modl_acc, na.rm=TRUE),
p5.log.acc = quantile(modl_acc, c(0.05),na.rm=TRUE),
up.ct = as.double(length(which(hitc==1 & aeid %in% mea.aeid[direction=='up']$aeid))),
dn.ct = as.double(length(which(hitc==1 & aeid %in% mea.aeid[direction=='dn']$aeid))),
bursting = min(modl_acc[aeid %in% mea.aeid[activity=='Bursting']$aeid], na.rm=TRUE),
connectivity = min(modl_acc[aeid %in% mea.aeid[activity=='Connectivity']$aeid], na.rm=TRUE),
firing = min(modl_acc[aeid %in% mea.aeid[activity=='Firing']$aeid], na.rm=TRUE),
cytotoxicity = min(modl_acc[aeid %in% mea.aeid[activity=='Cytotoxicity']$aeid], na.rm=TRUE)
), by = list(dsstox_substance_id, chnm, casn)]) # could make this by spid because if there are n>1 spid the assay number will mess up
invisible(lapply(names(mea.flag.ref), function(.name) set(mea.flag.ref, which(is.infinite(mea.flag.ref[[.name]])), j=.name, value=NA))) # remove Inf
invisible(lapply(names(mea.flag.ref), function(.name) set(mea.flag.ref, which(is.nan(mea.ref[[.name]])), j=.name, value=NA))) # remove NaN
mea.flag.ref[,bursting.spec := ifelse(!is.na(cytotoxicity), cytotoxicity - bursting, 3-bursting), by=list(dsstox_substance_id)]
mea.flag.ref[,connectivity.spec := ifelse(!is.na(cytotoxicity), cytotoxicity - connectivity, 3-connectivity), by=list(dsstox_substance_id)]
mea.flag.ref[,firing.spec := ifelse(!is.na(cytotoxicity), cytotoxicity - firing, 3-firing), by=list(dsstox_substance_id)]
Importantly, we only let the MEA flag be active if > 3 endpoints in the same direction are positive. The 5th percentile ACC of the positive MEA chemicals will serve as a potency comparator/flag (if most sensitive potency for the chemical).
mea.flag2 <- rbind(mea.flag[,-c('spid')], mea.flag.ref, fill=TRUE)
#mea.flag2[dsstox_substance_id %in% c(
# 'DTXSID2020686', #lindane
# 'DTXSID3027403', # tributyltin chloride - clearly tested in 2 spids
# 'DTXSID8023892', # abamectin
# 'DTXSID8032330', # beta-Cyfluthrin
# 'DTXSID9058238')]
mea.flag3 <- mea.flag2[,c('dsstox_substance_id',
'chnm',
'casn',
'total.endpoints.screened',
'up.ct',
'dn.ct',
'p5.log.acc',
'firing',
'bursting',
'connectivity',
'cytotoxicity')]
# should filter based on hitc sum
# should require at least 3-4 to be positive in a single direction
# edit the flag on this basis, then flag becomes the min potency in acute MEA if it is the min potency for the chemical
# consider 5th percentile value of potency after filtering
mea.flag3[, mea.call := 0]
mea.flag3[ up.ct > 3 | dn.ct>3, mea.call := 1]
mea.flag3$min_nam <- asnm.tier1$col_min[match(mea.flag3$dsstox_substance_id,
asnm.tier1$dsstox_substance_id)]
mea.flag3[min_nam=='CCTE', min_nam := 'MEA']
mea.flag3[!min_nam=='MEA', mea.call := 0]
This is specific to APCRA chemicals and their data. We are going to add this to an existing table with all columns. Then, we will make a streamlined version of the table for supplement/sharing.
colnames(asnm.tier1.all.invivo)
## [1] "dsstox_substance_id"
## [2] "aed50.3compss.5pacc"
## [3] "aed50.3compss.atg"
## [4] "aed50.3compss.bsk"
## [5] "aed50.3compss.ccte"
## [6] "aed50.3compss.nvs"
## [7] "aed50.3compss.stm"
## [8] "aed50.3compss.httr.mcf7.sigbmd"
## [9] "aed50.3compss.httr.u2os.sigbmd"
## [10] "aed50.3compss.httr.heparg.sigbmd"
## [11] "aed50.3compss.htpp.u2os"
## [12] "aed50.3compss.astar.beas2b"
## [13] "aed50.3compss.astar.hepg2"
## [14] "aed50.3compss.astar.hek293"
## [15] "aed50.pbtk.5pacc"
## [16] "aed50.pbtk.atg"
## [17] "aed50.pbtk.bsk"
## [18] "aed50.pbtk.ccte"
## [19] "aed50.pbtk.nvs"
## [20] "aed50.pbtk.stm"
## [21] "aed50.pbtk.httr.mcf7.sigbmd"
## [22] "aed50.pbtk.httr.u2os.sigbmd"
## [23] "aed50.pbtk.httr.heparg.sigbmd"
## [24] "aed50.pbtk.htpp.u2os"
## [25] "aed50.pbtk.astar.beas2b"
## [26] "aed50.pbtk.astar.hepg2"
## [27] "aed50.pbtk.astar.hek293"
## [28] "the.day"
## [29] "aed50.atg"
## [30] "aed50.bsk"
## [31] "aed50.ccte"
## [32] "aed50.nvs"
## [33] "aed50.stm"
## [34] "aed50.httr.mcf7"
## [35] "aed50.httr.u2os"
## [36] "aed50.httr.heparg"
## [37] "aed50.htpp.u2os"
## [38] "aed50.astar.beas2b"
## [39] "aed50.astar.hepg2"
## [40] "aed50.astar.hek293"
## [41] "p5.toxval.numeric"
## [42] "p10.toxval.numeric"
## [43] "p15.toxval.numeric"
## [44] "p20.toxval.numeric"
## [45] "p25.toxval.numeric"
## [46] "p30.toxval.numeric"
## [47] "p5.toxval.numeric.sub"
## [48] "p10.toxval.numeric.sub"
## [49] "p15.toxval.numeric.sub"
## [50] "p20.toxval.numeric.sub"
## [51] "p25.toxval.numeric.sub"
## [52] "p30.toxval.numeric.sub"
## [53] "CASRN"
## [54] "preferred_name"
## [55] "apcra.pro.only"
## [56] "T0"
## [57] "T4"
## [58] "Call"
## [59] "aqc_iv_pass"
## [60] "aqc_indicator"
## [61] "AVERAGE_MASS"
## [62] "log10VP"
## [63] "OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED"
## [64] "logP.indicator"
## [65] "mw.indicator"
## [66] "logVP.indicator"
## [67] "half.life"
## [68] "mlr.toxval25.aed50"
## [69] "med.aed50"
## [70] "min.aed50"
## [71] "data_poor_binary"
## [72] "ECHA_min_systemic_POD_mkd"
## [73] "log.ECHA_min_systemic_POD_mkd"
## [74] "log.repdose.90d.5th"
## [75] "log.repdose.any.5th"
## [76] "log.repdose.90d.25th"
## [77] "log.repdose.any.25th"
## [78] "ug.kg.bw.day"
## [79] "ttc"
## [80] "chnm"
## [81] "pod.ratio"
## [82] "ttc.ratio"
## [83] "sub.ratio"
## [84] "pod.ratio.size"
## [85] "seem3.u95.log10"
## [86] "seem3.log10"
## [87] "ber.targeted"
## [88] "ber.httr"
## [89] "ber.htpp"
## [90] "ber.astar"
## [91] "ber.med.aed50"
apcra.tbl.draft <- merge.data.table(asnm.tier1.all.invivo,
asnm.tier1.all.ratios[,c('dsstox_substance_id',
'pod.ratio5',
'pod.ratio25'
)],
by='dsstox_substance_id',
all.x=TRUE)
Add in critical elements of the DART flag table.
apcra.tbl.draft <- merge.data.table(apcra.tbl.draft,
dart.flag[,c('dsstox_substance_id',
'chnm',
'min_nam',
"dev.test",
"dev.flag",
"dev.flag.specific",
"final.er.flag",
"final.ar.flag")],
by='dsstox_substance_id',
all.x=TRUE)
Add in critical elements of the target organ system flag table.
apcra.tbl.draft <- merge.data.table(apcra.tbl.draft,
all.organs.flag[,c(
"dsstox_substance_id",
"mea_avail",
"MEA_p5_potency",
"MEA_call",
"MEA_firing",
"MEA_bursting",
"MEA_connectivity",
"MEA_cytotoxicity",
"BioMAP_acute",
"BioMAP_immunosupp",
"BioMAP_immuno_sel",
"HIPPTox_Lung",
"HIPPTox_Liver",
"HIPPTox_Kidney")],
by='dsstox_substance_id',
all.x=TRUE)
Calculate the standard deviation of the AED50s by assay source, excluding HIPPTox.
apcra.tbl.draft <- apcra.tbl.draft %>% rowwise() %>% mutate(aed50_sd=sd(c(aed50.atg, aed50.bsk, aed50.ccte, aed50.nvs, aed50.stm, aed50.httr.mcf7,aed50.httr.u2os, aed50.httr.heparg, aed50.htpp.u2os),na.rm=TRUE)) %>% data.table()
#duplicated(apcra.tbl.draft)
apcra.tbl.draft <- unique(apcra.tbl.draft)
#apcra.tbl.draft[c(26:27),]
#apcra.tbl.draft <- apcra.tbl.draft[-c(27),] # remove replicate endosulfan row - give preference to MEA call = 1
Make a reduced table that is easier to navigate.
dput(colnames(apcra.tbl.draft))
apcra.tbl.select <- apcra.tbl.reduced[, c("dsstox_substance_id",
"CASRN",
"preferred_name",
"chnm",
"apcra.pro.only",
"T0", "T4", "Call", "aqc_iv_pass", "aqc_indicator",
"AVERAGE_MASS", "log10VP", "OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED",
"the.day",
"aed50_sd",
"med.aed50",
"log.p5.toxval.pod",
"log.p5.toxval.sub",
"log.ECHA_min_systemic_POD_mkd", "log.repdose.90d.pod", "log.repdose.any.pod",
"ug.kg.bw.day", "ttc",
"pod.ratio", "ttc.ratio", "sub.ratio", "pod.ratio.size",
"seem3.u95.log10",
"seem3.log10",
"ber.targeted",
"ber.httr",
"ber.htpp",
"ber.astar",
"ber.med.aed50",
"MEA_call",
"BioMAP_immunosupp",
"BioMAP_immuno_sel",
"HIPPTox_Lung",
"HIPPTox_Liver",
"HIPPTox_Kidney") ]
This is to include all reference chemicals associated with each flag
save(ad.tbl,
#apcra.tbl.reduced,
#apcra.tbl.select,
apcra.tbl.draft,
asnm.tier1.all.ratios,
dart.flag,
er.ar.apcra,
dev.flag,
all.organs.flag,
bsk.flag2,
mea.flag3,
file='./output/APCRA_haz_flg_BER.RData')
list_data<- list("full_apcra_tbl" = as.data.frame(apcra.tbl.draft),
"pod_ratios_apcra_tbl" = as.data.frame(asnm.tier1.all.ratios),
"ad.tbl" = as.data.frame(ad.tbl),
"dart.flag" = as.data.frame(dart.flag),
"er.ar.apcra" = as.data.frame(er.ar.apcra),
"dev.flag"= as.data.frame(dev.flag),
"all.organs.flag" = as.data.frame(all.organs.flag),
"BioMAP.immune.flag" = as.data.frame(bsk.flag2),
"MEA.neuro.flag" = as.data.frame(mea.flag3))
write.xlsx(list_data, './output/SuppFile_APCRA_haz_flg_BER_May2024.xlsx')
print(sessionInfo())
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] parallel grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ComplexHeatmap_2.14.0 circlize_0.4.15 viridis_0.6.2
## [4] viridisLite_0.4.1 tcpl_3.1.0 tidyr_1.3.0
## [7] tictoc_1.1 stringr_1.5.1 RMySQL_0.10.25
## [10] DBI_1.2.2 randomForest_4.7-1.1 plotly_4.10.1
## [13] openxlsx_4.2.5.2 jtools_2.2.1 kableExtra_1.3.4
## [16] httk_2.3.0 gridExtra_2.3 gplots_3.1.3
## [19] ggstance_0.3.6 ggpmisc_0.5.2 ggpp_0.5.2
## [22] DT_0.28 dplyr_1.1.1 DescTools_0.99.48
## [25] data.table_1.14.8 cowplot_1.1.1 caret_6.0-94
## [28] lattice_0.21-8 ggplot2_3.4.2
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.2 systemfonts_1.0.4 plyr_1.8.8
## [4] lazyeval_0.2.2 splines_4.2.2 listenv_0.9.0
## [7] digest_0.6.31 foreach_1.5.2 htmltools_0.5.8.1
## [10] fansi_1.0.4 memoise_2.0.1 magrittr_2.0.3
## [13] cluster_2.1.4 doParallel_1.0.17 confintr_0.2.0
## [16] recipes_1.0.5 globals_0.16.2 gower_1.0.1
## [19] matrixStats_0.63.0 svglite_2.1.1 hardhat_1.3.0
## [22] timechange_0.2.0 colorspace_2.1-0 blob_1.2.4
## [25] rvest_1.0.3 mitools_2.4 rbibutils_2.2.13
## [28] xfun_0.43 crayon_1.5.2 jsonlite_1.8.4
## [31] Exact_3.2 survival_3.5-5 iterators_1.0.14
## [34] glue_1.6.2 gtable_0.3.4 ipred_0.9-14
## [37] webshot_0.5.4 MatrixModels_0.5-1 GetoptLong_1.0.5
## [40] shape_1.4.6 future.apply_1.10.0 BiocGenerics_0.44.0
## [43] SparseM_1.81 scales_1.3.0 mvtnorm_1.1-3
## [46] Rcpp_1.0.10 tcplfit2_0.1.6 clue_0.3-64
## [49] bit_4.0.5 proxy_0.4-27 deSolve_1.35
## [52] sqldf_0.4-11 stats4_4.2.2 lava_1.7.2.1
## [55] survey_4.1-1 prodlim_2023.03.31 htmlwidgets_1.6.4
## [58] httr_1.4.7 RColorBrewer_1.1-3 farver_2.1.1
## [61] pkgconfig_2.0.3 nnet_7.3-18 sass_0.4.9
## [64] utf8_1.2.3 RMariaDB_1.2.2 labeling_0.4.3
## [67] tidyselect_1.2.1 rlang_1.1.0 reshape2_1.4.4
## [70] polynom_1.4-1 munsell_0.5.1 cellranger_1.1.0
## [73] tools_4.2.2 cachem_1.0.7 cli_3.6.1
## [76] gsubfn_0.7 generics_0.1.3 RSQLite_2.3.1
## [79] evaluate_0.23 fastmap_1.1.1 yaml_2.3.7
## [82] ModelMetrics_1.2.2.2 knitr_1.46 bit64_4.0.5
## [85] zip_2.2.2 pander_0.6.5 caTools_1.18.2
## [88] purrr_1.0.1 rootSolve_1.8.2.3 future_1.32.0
## [91] nlme_3.1-162 quantreg_5.95 xml2_1.3.3
## [94] compiler_4.2.2 rstudioapi_0.14 png_0.1-8
## [97] e1071_1.7-13 tibble_3.2.1 bslib_0.7.0
## [100] stringi_1.7.12 highr_0.10 Matrix_1.5-4
## [103] vctrs_0.6.1 msm_1.7 pillar_1.9.0
## [106] lifecycle_1.0.4 GlobalOptions_0.1.2 Rdpack_2.4
## [109] jquerylib_0.1.4 bitops_1.0-7 lmom_2.9
## [112] R6_2.5.1 KernSmooth_2.23-20 IRanges_2.32.0
## [115] parallelly_1.35.0 gld_2.6.6 codetools_0.2-19
## [118] boot_1.3-28.1 MASS_7.3-58.3 gtools_3.9.4
## [121] chron_2.3-60 proto_1.0.0 rjson_0.2.21
## [124] withr_3.0.0 S4Vectors_0.36.2 mgcv_1.8-42
## [127] expm_0.999-7 hms_1.1.3 rpart_4.1.19
## [130] timeDate_4022.108 class_7.3-21 rmarkdown_2.26
## [133] pROC_1.18.0 numDeriv_2016.8-1.1 lubridate_1.9.2